################################################################################
## setup
################################################################################
# clean slate
rm(list = ls())
date()

# load packages
pkg <- c("tidyverse", 
         "gridExtra", 
         "lfe", 
         "stargazer", 
         "rms")
lapply(pkg, require, character.only = TRUE)

# set directory
MAIN_DIR <- "~/Dropbox/Research/Liao_Newman_Malhotra/replication/"

# set file directories
FIG_DIR <- paste(MAIN_DIR, "figures/", sep = "")
TABLE_DIR <- paste(MAIN_DIR, "tables/", sep = "")
RESULTS <- paste(MAIN_DIR, "results/", sep = "")

# create folder if doesn't exist
if (!dir.exists(FIG_DIR)) {
  print("Directory does not exist! Creating...")
  dir.create(FIG_DIR)
}
if (!dir.exists(TABLE_DIR)) {
  print("Directory does not exist! Creating...")
  dir.create(TABLE_DIR)
}
if (!dir.exists(RESULTS)) {
  print("Directory does not exist! Creating...")
  dir.create(RESULTS)
}

# load data
load(file = paste(MAIN_DIR, "cmps-merge.RData", sep = ""))

# set dataframe
df <- cmps.merge

df.2008 <- df %>%
  filter(year == 2008)

df.2012 <- df %>%
  filter(year == 2012)


################################################################################
## Table S2. The Presence of Chinese International Students and the 
## Perceived Neighborhood Prevalence of Asians, 2008
################################################################################
# Column (1)
ols.2008.b <- felm(asian_neighborhood_tri ~ log_china_undergraduate 
                   | 0 | 0 | 0, # FEs | IVs | Clustered SEs
                   data = df.2008,
                   keepX = TRUE)

summary(ols.2008.b)

# Column (2)
ols.2008.e <- felm(asian_neighborhood_tri ~ log_china_undergraduate + 
                     homeowner + age + edu + white + income
                   | 0 | 0 | 0, # FEs | IVs | Clustered SEs
                   data = df.2008,
                   keepX = TRUE)
                    
summary(ols.2008.e)

# Column (3)
ols.2008.f <- felm(asian_neighborhood_tri ~ log_china_undergraduate + 
                     homeowner + age + edu + white + income +
                     median_household_income_zcta + ln_pop_density_zcta + share_edu_ba_above
                   | 0 | 0 | 0, # FEs | IVs | Clustered SEs
                   data = df.2008,
                   keepX = TRUE)
                    
summary(ols.2008.f)

# effect size
coef(ols.2008.f)["log_china_undergraduate"]/sd(df.2008$asian_neighborhood_tri,
                                              na.rm = TRUE)

# Column (4)
ologit.2008 <- lrm(asian_neighborhood_tri ~ log_china_undergraduate + 
                     homeowner + age + edu + white + income +
                     median_household_income_zcta + ln_pop_density_zcta + share_edu_ba_above,
                   x = TRUE, y = TRUE,
                   data = df.2008)
                  
print(ologit.2008)

# collect results
models.2008 <- list(ols.2008.b, ols.2008.e, ols.2008.f, ologit.2008)

# set pvalues
pvalue.2008 <- list(round(ols.2008.b$pval, 3),
                    round(ols.2008.e$pval, 3),
                    round(ols.2008.f$pval, 3),
                    round(anova(ologit.2008)[,"P"], 3))

# set CIs
ci.2008 <- list(confint(ols.2008.b),
                confint(ols.2008.e),
                confint(ols.2008.f),
                confint.default(ologit.2008))

vars.order <- c("^log_china_undergraduate$",
                "^homeowner$",
                "^age$",
                "^edu$",
                "^white$",
                "^income$",
                "^median_household_income_zcta$",
                "^ln_pop_density_zcta$",
                "^share_edu_ba_above$",
                "^y> =1$",
                "^y> =2$")

vars.names <- c("Log Chinese Undergraduate Population",
                "Homeowner", 
                "Age", 
                "Education", 
                "White", 
                "Income",
                "ZIP-Code Median Household Income", 
                "ZIP-Code Population Density", 
                "ZIP-Code Share of Pop. with a BA Degree or Higher",
                "y>=1",
                "y>=2")

# save results
# NOTE that a bug in stargazer causes it to use default p-values instead of 
# replacing them with the p-values we have calculated above. Results are 
# usually the same up to two decimal points. When there is a slight difference, 
# we report in the SI the p-values calculated above.
sink(file.path(TABLE_DIR, "Table-S2-asian-neighborhood-2008.txt"))
#sink(file.path(TABLE_DIR, "Table-S2-asian-neighborhood-2008.tex"))
stargazer(models.2008,
          #p = pvalue.2008, # does not seem to work under report = ("vcsp"), corrected in SI
          p.auto = FALSE,
          t.auto = FALSE,
          ci = TRUE,
          ci.custom = ci.2008,
          report = ("vcsp"),
          digits = 3, 
          type = "text",
          #type = "latex",
          label = "tb:asian-neighborhood-2008",
          font.size = "scriptsize",
          single.row = FALSE,
          title = "The Presence of Chinese International Students and the Perceived Neighborhood Prevalence of Asians, 2008",
          dep.var.caption = "2008 CMPS Respondent's Perception of an Asian Neighborhood",
          dep.var.labels = c("Ordinal (2 = Entirely, 1 = Mostly, 0 = Partially)"),
          column.labels = c("OLS", "OLS", "OLS", "Ordered Logit"),
          model.numbers = TRUE,
          model.names = FALSE,
          order = vars.order,
          covariate.labels = vars.names,
          omit.stat = c("f",
                        "ser"),
          star.cutoffs = c(0.1, 0.05, 0.01),
          star.char = c("*", "**", "***"),
          #notes = c("Standard errors in parentheses. $^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01"),
          notes = c("95\\% confidence intervals and p-values are presented."),
          notes.align = "l",
          notes.append = FALSE)
sink()


################################################################################
## Table S3. The Presence of Chinese International Students and the 
## Perceived Neighborhood Prevalence of Asians, 2012
################################################################################
# Column (1)
ols.2012.b <- felm(asian_neighborhood_tri ~ log_china_undergraduate 
                  | 0 | 0 | 0, # FEs | IVs | Clustered SEs
                  data = df.2012,
                  keepX = TRUE)
summary(ols.2012.b)

# Column (2)
ols.2012.e <- felm(asian_neighborhood_tri ~ log_china_undergraduate + 
                    homeowner + age + edu + white + income
                  | 0 | 0 | 0, # FEs | IVs | Clustered SEs
                  data = df.2012,
                  keepX = TRUE)
summary(ols.2012.e)

# Column (3)
ols.2012.f <- felm(asian_neighborhood_tri ~ log_china_undergraduate + 
                    homeowner + age + edu + white + income +
                    median_household_income_zcta + ln_pop_density_zcta + share_edu_ba_above
                  | 0 | 0 | 0, # FEs | IVs | Clustered SEs
                  data = df.2012,
                  keepX = TRUE)
summary(ols.2012.f)

coef(ols.2012.f)["log_china_undergraduate"]/sd(df.2012$asian_neighborhood_tri, na.rm = TRUE)

# Column (4)
ologit.2012 <- lrm(asian_neighborhood_tri ~ log_china_undergraduate + 
                  homeowner + age + edu + white + income +
                  median_household_income_zcta + ln_pop_density_zcta + share_edu_ba_above,
                x = TRUE, y = TRUE,
                data = df.2012)
print(ologit.2012)

# collect results
models.2012 <- list(ols.2012.b, 
                    ols.2012.e, 
                    ols.2012.f, 
                    ologit.2012)

# set p-values
pvalue.2012 <- list(round(ols.2012.b$pval, 3),
                    round(ols.2012.e$pval, 3),
                    round(ols.2012.f$pval, 3),
                    round(anova(ologit.2012)[,"P"], 3))

# set CIs
ci.2012 <- list(confint(ols.2012.b),
                confint(ols.2012.e),
                confint(ols.2012.f),
                confint.default(ologit.2012))

vars.order <- c("^log_china_undergraduate$",
                "^homeowner$",
                "^age$",
                "^edu$",
                "^white$",
                "^income$",
                "^median_household_income_zcta$",
                "^ln_pop_density_zcta$",
                "^share_edu_ba_above$",
                "^y> =1$",
                "^y> =2$")

vars.names <- c("Log Chinese Undergraduate Population",
                "Homeowner", 
                "Age", 
                "Education", 
                "White", 
                "Income",
                "ZIP-Code Median Household Income", 
                "ZIP-Code Population Density", 
                "ZIP-Code Share of Pop. with a BA Degree or Higher",
                "y>=1",
                "y>=2")

# save results
# NOTE that a bug in stargazer causes it to use default p-values instead of 
# replacing them with the p-values we have calculated above. Results are 
# usually the same up to two decimal points. When there is a slight difference, 
# we report in the SI the p-values calculated above.
sink(file.path(TABLE_DIR, "Table-S3-asian-neighborhood-2012.txt"))
#sink(file.path(TABLE_DIR, "Table-S3-asian-neighborhood-2012.tex"))
stargazer(models.2012,
          #p = pvalue.2012, # does not seem to work under report = ("vcsp"), corrected in SI
          p.auto = FALSE,
          t.auto = FALSE,
          ci = TRUE,
          ci.custom = ci.2012,
          report = ("vcsp"),
          digits = 3, 
          type = "text",
          #type = "latex",
          label = "tb:asian-neighborhood-2012",
          font.size = "scriptsize",
          single.row = FALSE,
          title = "The Presence of Chinese International Students and the Perceived Neighborhood Prevalence of Asians, 2012",
          dep.var.caption = "2012 CMPS Respondent's Perception of an Asian Neighborhood",
          dep.var.labels = c("Ordinal (2 = Entirely, 1 = Mostly, 0 = Partially)"),
          column.labels = c("OLS", "OLS", "OLS", "Ordered Logit"),
          model.numbers = TRUE,
          model.names = FALSE,
          order = vars.order,
          covariate.labels = vars.names,
          omit.stat = c("f",
                        "ser"),
          star.cutoffs = c(0.1, 0.05, 0.01),
          star.char = c("*", "**", "***"),
          #notes = c("Standard errors in parentheses. $^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01"),
          notes = c("95\\% confidence intervals and p-values are presented."),
          notes.align = "l",
          notes.append = FALSE)
sink()

